---
title: "Experimental Analysis and Robustness Checks"
author: "Erica Ann Metheney and Lauren Yehle"
header-includes:
    - \usepackage{float}
    - \usepackage{array}
    - \usepackage{multirow}
    - \usepackage{multicol}
    - \usepackage{longtable}
    - \usepackage{dcolumn}
date: \today{}
output:
  pdf_document:
      fig_caption: true
      number_sections: true
      keep_tex: true
      keep_md: yes
classoption: landscape
geometry: margin=2cm
---

```{r setup, include=FALSE}
require("knitr")
require("summarytools")
knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message = FALSE, comment=NA,prompt = FALSE, cache = FALSE, results = 'asis',fig.width = 8,fig.height = 6,echo = FALSE,results = 'asis',tidy.opts = list(width.cutoff = 60), tidy = TRUE, dev = "png")
opts_knit$set(root.dir = "Experiment 1 Quantitative")

st_options(bootstrap.css     = FALSE,       # Already part of the theme so no need for it
           plain.ascii       = FALSE,       # One of the essential settings
           style             = "rmarkdown", # Idem.
           dfSummary.silent  = TRUE,        # Suppresses messages about temporary files
           footnote          = NA,          # Keeping the results minimalistic
           subtitle.emphasis = FALSE)       # For the vignette theme, this gives better results.
                                            # For other themes, using TRUE might be preferable.

st_css()
```

```{r required_libraries}
library(readxl)
library(plyr)
library(dplyr)
library(lme4)
library(ggplot2)
library(ggmosaic)
library(nlme)
library(merTools)
library(lmerTest)
library(kableExtra)
library(forcats)
library(stargazer)
library(readxl)
library(interactions)
library(formatR)
library(texreg)
```

```{r user_functions}
mlm.fixed.effects.table <- function(fit,title){
  results = as.data.frame(summary(fit)$coefficients)
  results = results %>% dplyr::mutate(Star = cut(results[,5],breaks = c(0,0.001,0.01,0.05,1.1),right = FALSE))
  results$Star = mapvalues(results$Star,from = levels(results$Star), to = c("***","**","*",""))
  results = results[,-c(3,4)]
  print(kable(results,caption = paste("MLM Fixed Component Results: ", title ,"(n = ",nobs(fit),")",sep = "")) %>% column_spec(1,background = ifelse(summary(fit)$coefficients[,5] <0.05, "#79C9C6", "#C1BBB5"))%>%
  kable_styling(latex_options = "HOLD_position"))
}

mlm.fixed.effects.table.log <- function(fit,title){
  results = as.data.frame(summary(fit)$coefficients)
  results = results %>% dplyr::mutate(Star = cut(results[,4],breaks = c(0,0.001,0.01,0.05,1.1),right = FALSE))
  results$Star = mapvalues(results$Star,from = levels(results$Star), to = c("***","**","*",""))
  results = results[,-c(3)]
  print(kable(results,caption = paste("MLM Fixed Component Results: ", title ,"(n = ",nobs(fit),")",sep = "")) %>% column_spec(1,background = ifelse(summary(fit)$coefficients[,4] <0.05, "#79C9C6", "#C1BBB5"))%>%
  kable_styling(latex_options = "HOLD_position"))
}

mlm.random.effects.table <- function(fit,title){
  results = as.data.frame(VarCorr(fit))
  results = subset(results,select = c("grp","var1","vcov","sdcor"))
  print(kable(results,caption = paste("MLM Random Component Results: ", title ,"(n = ",nobs(fit),")",sep = "")) %>% column_spec(1,background = ifelse(results$vcov >0 & results$grp == "Question", "#79C9C6", "#C1BBB5"))%>%
  kable_styling(latex_options = "HOLD_position"))
  
}
```

```{r}
#Qualitative Codes
dat <- read_excel("coded_AI_output.xlsx")
trtID <- read_excel("GPTresults_with_Treatment.xlsx")

dat$StatDes = paste(dat$Statement,dat$Description,sep = " ")
trtID$StatDes = paste(trtID$Statement,trtID$Description,sep = " ")

#Remove Treatment with Error
indx.trtID = which(trtID$Treatment == "E1M1P2G8")
trtID = trtID[-indx.trtID,]

indx.dat = c(7526,5316,4839,2666,370)
dat = subset(dat,!(AnonID %in% indx.dat))

#Merge
dat = merge(dat,trtID,by = "StatDes")

#Clean Up
dat$Statement = dat$Statement.x
dat$Description = dat$Description.x

```


```{r data_cleaning}
#Breakdown ID into Constituent Parts
dat$StatementID = substr(dat$ID, nchar(dat$ID)-1,nchar(dat$ID))
dat$TrtQ = sub("S[^S]*$", "", dat$ID)
dat$Model = substr(dat$TrtQ,3,4)
dat$Persona = substr(dat$TrtQ,5,6)
dat$Question = substring(dat$TrtQ,7)

dat$Source = ifelse(startsWith(dat$Question,"L"),"LGPI",NA)
dat$Source[startsWith(dat$Question,"W")]<- "WVS"
dat$Source[startsWith(dat$Question,"G")]<- "Gallup"
dat$Source[startsWith(dat$Question,"Q")]<- "QQ"
dat$Source = factor(dat$Source, levels = c("QQ","LGPI","WVS","Gallup"))

#Fix TrtQ Error
dat$TrtQ[which(dat$AnonID == 2817)] <- "E1M2P3L122"
dat$TrtQ[which(dat$AnonID == 6370)] <- "E1M2P3L122"
dat$TrtQ[which(dat$AnonID == 565)] <- "E1M2P3L122"
dat$TrtQ[which(dat$AnonID == 186)] <- "E1M2P3L122"
dat$TrtQ[which(dat$AnonID == 2525)] <- "E1M2P3L122"

#Condense Responses within Statements
df = dat %>% group_by(TrtQ) %>% dplyr::summarize(`Code 1` = max(`1 Vague Term or Phrase`,na.rm = TRUE),
                                                 `Code 2` = max(`2 Specialized Knowledge`,na.rm = TRUE),
                                                 `Code 3` = max(`3 Syntax Problem`,na.rm = TRUE),
                                                 `Code 4` = max(`4 Unfair Presumption`,na.rm = TRUE),
                                                 `Code 5` = max(`5 Double Barreled`,na.rm = TRUE),
                                                 `Code 6` = max(`6 Undefined or Ill Reference Period`,na.rm = TRUE),
                                                 `Code 7` = max(`7 Difficult Recall or reference period`,na.rm = TRUE),
                                                 `Code 8` = max(`8 Complex Estimation/Computation`,na.rm = TRUE),
                                                 `Code 9` = max(`9 Sensitive`,na.rm = TRUE),
                                                 `Code 10` = max(`10 Leading/Biasing`,na.rm = TRUE),
                                                 `Code 11` = max(`11 Answer Set`, na.rm = TRUE),
                                                 SysVar= max(`Internal: Systematic Variation`, na.rm = TRUE),
                                                 NOTA = max(`None of the Above`,na.rm = TRUE),
                                                 TrtQ = unique(TrtQ),
                                                 Model = unique(Model),
                                                 Persona = unique(Persona),
                                                 Question = unique(Question),
                                                 Source = unique(Source))
df2 = dat %>% group_by(TrtQ) %>% dplyr::summarize(Count = n())

df[df==-Inf]<- 0
df = df %>% dplyr::mutate(NumCodesTrtQ = `Code 1` + `Code 2`+ `Code 3` + `Code 4` + `Code 5` + `Code 6` + `Code 7` + `Code 8` + `Code 9` + `Code 10` + `Code 11`)

df$NOTA2 = ifelse(df$NOTA == 1|df$SysVar == 1,1,0)
```


# Total Number of Unique Codes Per Treatment-Question Pair

```{r total_num_reg}
fit.mlm <- lmer(NumCodesTrtQ ~ Model + Persona + (1|Question),data = df)
texreg(fit.mlm,custom.model.names = "Number of Codes",caption = "Multilevel Regression Results for Number of Codes Per Treatment-Question Pair",label = "tab:NumCodesTrtQ",caption.above = TRUE)
```

\newpage

# Likelihood of Code Per Treatement-Question Pair

We fit both linear probability and logistic multilevel models for each code. 

```{r code_reg}
fit1 <- lmer(`Code 1` ~ Model + Persona + (1|Question),data = df)
fit1.log <- glmer(`Code 1`~Model+Persona +(1|Question),family = binomial(link = "logit"),data = df)
fit2 <- lmer(`Code 2` ~ Model + Persona + (1|Question),data = df)
fit2.log <- glmer(`Code 2`~Model+Persona +(1|Question),family = binomial(link = "logit"),data = df)
fit3 <- lmer(`Code 3` ~ Model + Persona + (1|Question),data = df)
fit3.log <- glmer(`Code 3`~Model+Persona +(1|Question),family = binomial(link = "logit"),data = df)
fit4 <- lmer(`Code 4` ~ Model + Persona + (1|Question),data = df)
fit4.log <- glmer(`Code 4`~Model+Persona +(1|Question),family = binomial(link = "logit"),data = df)
fit5 <- lmer(`Code 5`~ Model + Persona + (1|Question),data = df)
fit5.log <- glmer(`Code 5`~Model+Persona +(1|Question),family = binomial(link = "logit"),data = df)
fit6 <- lmer(`Code 6` ~ Model + Persona + (1|Question),data = df)
fit6.log <- glmer(`Code 6`~Model+Persona +(1|Question),family = binomial(link = "logit"),data = df)
fit7 <- lmer(`Code 7` ~ Model + Persona + (1|Question),data = df)
fit7.log <- glmer(`Code 7`~Model+Persona +(1|Question),family = binomial(link = "logit"),data = df)
fit8 <- lmer(`Code 8` ~ Model + Persona + (1|Question),data = df)
fit8.log <- glmer(`Code 8`~Model+Persona +(1|Question),family = binomial(link = "logit"),data = df)
fit9 <- lmer(`Code 9` ~ Model + Persona + (1|Question),data = df)
fit9.log <- glmer(`Code 9`~Model+Persona +(1|Question),family = binomial(link = "logit"),data = df)
fit10 <- lmer(`Code 10` ~ Model + Persona + (1|Question),data = df)
fit10.log <- glmer(`Code 10`~Model+Persona +(1|Question),family = binomial(link = "logit"),data = df)
fit11 <- lmer(`Code 11` ~ Model + Persona + (1|Question),data = df)
fit11.log <- glmer(`Code 11`~Model+Persona +(1|Question),family = binomial(link = "logit"),data = df)
fitSysVar <- lmer(SysVar ~ Model + Persona + (1|Question),data = df)
fitSysVar.log <- glmer(SysVar~Model+Persona +(1|Question),family = binomial(link = "logit"),data = df)

fitNOTA <- lmer(NOTA2 ~ Model + Persona + (1|Question),data = df)
fitNOTA.log <- glmer(NOTA2~Model+Persona +(1|Question),family = binomial(link = "logit"),data = df)

texreg(list(fit1,fit2,fit3,fit4,fit5,fit6),custom.model.names = paste("Code",1:6,sep = " "),caption = "Multilevel Regression Results for Codes 1 to 5",label = "tab:RegCode1to5",caption.above = TRUE)
texreg(list(fit7,fit8,fit9,fit10,fit11,fitSysVar),custom.model.names = c(paste("Code",7:11,sep = " "),"SysVar"),caption = "Multilevel Regression Results for Codes 6 to 11",label = "tab:RegCode6to11",caption.above = TRUE)
texreg(fitNOTA,custom.model.names = "NOTA",caption = "Multilevel Regression Results the Code NOTA (None of the Above)",label = "tab:RegNOTA",caption.above = TRUE)

texreg(list(fit1.log,fit2.log,fit3.log,fit4.log,fit5.log,fit6.log),custom.model.names = paste("Code",1:6,sep = " "),caption = "Multilevel Logistic Regression Results for Codes 1 to 6",label = "tab:RegCode1to5log",caption.above = TRUE)
texreg(list(fit7.log,fit8.log,fit9.log,fit10.log,fit11.log,fitSysVar.log),custom.model.names = c(paste("Code",7:11,sep = " "),"SysVar"),caption = "Multilevel Logistic Regression Results for Codes 7 to 11 SysVar",label = "tab:RegCode6to11log",caption.above = TRUE)
texreg(fitNOTA.log, custom.model.names = "NOTA", caption = "Multilevel Logistic Regression Results the Code NOTA (None of the Above)",label = "tab:RegNOTAlog",caption.above = TRUE)


texreg(list(fit1,fit2,fit3,fit4,fit5,fit6,fit7,fit8,fit9,fit10,fit11,fitSysVar),custom.model.names = c(paste("Code",1:11,sep = " "),"SysVar"),caption = "Multilevel Regression Results for Codes 1 to 11",label = "tab:RegCode1to11",caption.above = TRUE)

models = list(fit1, fit2, fit3, fit4, fit5, fit6, fit7, fit8, fit9, fit10, fit11)
models.log = list(fit1.log, fit2.log, fit3.log, fit4.log, fit5.log, fit6.log, fit7.log, fit8.log, fit9.log, fit10.log, fit11.log)
```

## Holm's Corrected Tables - LMP

```{r MTCmodel}
library(broom.mixed)
#Get p-values for Model effect (M2)
target_term <- "ModelM2"
raw_pvals <- sapply(models, function(model) {
  tidy(model, effects = "fixed") |>
    dplyr::filter(term == target_term) |>
    dplyr::pull(p.value)
})

# Apply Holm correction
pvals_adj <- p.adjust(raw_pvals, method = "holm")
pvals_adj_M2 <- p.adjust(raw_pvals, method = "holm")


# Update regression tables
make_texreg_partial_correction <- function(model, adjusted_pval, target_term) {
  tidy_df <- tidy(model, effects = "fixed")
  
  # Replace the p-value for the term of interest
  tidy_df$p.value[tidy_df$term == target_term] <- adjusted_pval

  # Construct texreg object
  texreg::createTexreg(
    coef.names = tidy_df$term,
    coef = tidy_df$estimate,
    se = tidy_df$std.error,
    pvalues = tidy_df$p.value
  )
}

# Apply across all models
texreg_list <- mapply(
  make_texreg_partial_correction,
  model = models,
  adjusted_pval = pvals_adj,
  MoreArgs = list(target_term = target_term),
  SIMPLIFY = FALSE
)



# Output with corrected tables
texreg(texreg_list, custom.model.names = paste0("Code ", 1:11),caption = "Regression Results with Holmes Multiple Testing Correction on Model Effect Only",caption.above = TRUE)
```

```{r MTCmodelpersona2}
library(broom.mixed)
#Get p-values for Persona effect (P2)
target_term <- "PersonaP2"
raw_pvals <- sapply(models, function(model) {
  tidy(model, effects = "fixed") |>
    dplyr::filter(term == target_term) |>
    dplyr::pull(p.value)
})

# Apply Holm correction
pvals_adj <- p.adjust(raw_pvals, method = "holm")
pvals_adj_P2 <- p.adjust(raw_pvals, method = "holm")


# Update regression tables
make_texreg_partial_correction <- function(model, adjusted_pval, target_term) {
  tidy_df <- tidy(model, effects = "fixed")
  
  # Replace the p-value for the term of interest
  tidy_df$p.value[tidy_df$term == target_term] <- adjusted_pval

  # Construct texreg object
  texreg::createTexreg(
    coef.names = tidy_df$term,
    coef = tidy_df$estimate,
    se = tidy_df$std.error,
    pvalues = tidy_df$p.value
  )
}

# Apply across all models
texreg_list <- mapply(
  make_texreg_partial_correction,
  model = models,
  adjusted_pval = pvals_adj,
  MoreArgs = list(target_term = target_term),
  SIMPLIFY = FALSE
)



# Output with corrected tables
texreg(texreg_list, custom.model.names = paste0("Code ", 1:11),caption = "Regression Results with Holmes Multiple Testing Correction on Persona Effect (P2 = Survey Design Expert) Only",caption.above = TRUE)


```

```{r MTCmodelpersona3}
library(broom.mixed)
#Get p-values for Persona effect (P3)
target_term <- "PersonaP3"
raw_pvals <- sapply(models, function(model) {
  tidy(model, effects = "fixed") |>
    dplyr::filter(term == target_term) |>
    dplyr::pull(p.value)
})

# Apply Holm correction
pvals_adj <- p.adjust(raw_pvals, method = "holm")
pvals_adj_P3 <- p.adjust(raw_pvals, method = "holm")



# Update regression tables
make_texreg_partial_correction <- function(model, adjusted_pval, target_term) {
  tidy_df <- tidy(model, effects = "fixed")
  
  # Replace the p-value for the term of interest
  tidy_df$p.value[tidy_df$term == target_term] <- adjusted_pval

  # Construct texreg object
  texreg::createTexreg(
    coef.names = tidy_df$term,
    coef = tidy_df$estimate,
    se = tidy_df$std.error,
    pvalues = tidy_df$p.value
  )
}

# Apply across all models
texreg_list <- mapply(
  make_texreg_partial_correction,
  model = models,
  adjusted_pval = pvals_adj,
  MoreArgs = list(target_term = target_term),
  SIMPLIFY = FALSE
)



# Output with corrected tables
texreg(texreg_list, custom.model.names = paste0("Code ", 1:11),caption = "Regression Results with Holmes Multiple Testing Correction on Persona Effect (P3 = Linguist) Only",caption.above = TRUE)
```

## Holm's Corrected Tables - Logistic

```{r MTCmodellog}
library(broom.mixed)
#Get p-values for Model effect (M2)
target_term <- "ModelM2"
raw_pvals <- sapply(models.log, function(model) {
  tidy(model, effects = "fixed") |>
    dplyr::filter(term == target_term) |>
    dplyr::pull(p.value)
})

# Apply Holm correction
pvals_adj <- p.adjust(raw_pvals, method = "holm")
pvals_adj_M2 <- p.adjust(raw_pvals, method = "holm")


# Update regression tables
make_texreg_partial_correction <- function(model, adjusted_pval, target_term) {
  tidy_df <- tidy(model, effects = "fixed")
  
  # Replace the p-value for the term of interest
  tidy_df$p.value[tidy_df$term == target_term] <- adjusted_pval

  # Construct texreg object
  texreg::createTexreg(
    coef.names = tidy_df$term,
    coef = tidy_df$estimate,
    se = tidy_df$std.error,
    pvalues = tidy_df$p.value
  )
}

# Apply across all models
texreg_list <- mapply(
  make_texreg_partial_correction,
  model = models.log,
  adjusted_pval = pvals_adj,
  MoreArgs = list(target_term = target_term),
  SIMPLIFY = FALSE
)



# Output with corrected tables
texreg(texreg_list, custom.model.names = paste0("Code ", 1:11),caption = "Logistic Regression Results with Holmes Multiple Testing Correction on Model Effect Only",caption.above = TRUE)
```

```{r MTCmodelpersona2log}
library(broom.mixed)
#Get p-values for Persona effect (P2)
target_term <- "PersonaP2"
raw_pvals <- sapply(models.log, function(model) {
  tidy(model, effects = "fixed") |>
    dplyr::filter(term == target_term) |>
    dplyr::pull(p.value)
})

# Apply Holm correction
pvals_adj <- p.adjust(raw_pvals, method = "holm")
pvals_adj_P2 <- p.adjust(raw_pvals, method = "holm")


# Update regression tables
make_texreg_partial_correction <- function(model, adjusted_pval, target_term) {
  tidy_df <- tidy(model, effects = "fixed")
  
  # Replace the p-value for the term of interest
  tidy_df$p.value[tidy_df$term == target_term] <- adjusted_pval

  # Construct texreg object
  texreg::createTexreg(
    coef.names = tidy_df$term,
    coef = tidy_df$estimate,
    se = tidy_df$std.error,
    pvalues = tidy_df$p.value
  )
}

# Apply across all models
texreg_list <- mapply(
  make_texreg_partial_correction,
  model = models.log,
  adjusted_pval = pvals_adj,
  MoreArgs = list(target_term = target_term),
  SIMPLIFY = FALSE
)



# Output with corrected tables
texreg(texreg_list, custom.model.names = paste0("Code ", 1:11),caption = "Logistic Regression Results with Holmes Multiple Testing Correction on Persona Effect (P2 = Survey Design Expert) Only",caption.above = TRUE)


```

```{r MTCmodelpersona3log}
library(broom.mixed)
#Get p-values for Persona effect (P3)
target_term <- "PersonaP3"
raw_pvals <- sapply(models.log, function(model) {
  tidy(model, effects = "fixed") |>
    dplyr::filter(term == target_term) |>
    dplyr::pull(p.value)
})

# Apply Holm correction
pvals_adj <- p.adjust(raw_pvals, method = "holm")
pvals_adj_P3 <- p.adjust(raw_pvals, method = "holm")



# Update regression tables
make_texreg_partial_correction <- function(model, adjusted_pval, target_term) {
  tidy_df <- tidy(model, effects = "fixed")
  
  # Replace the p-value for the term of interest
  tidy_df$p.value[tidy_df$term == target_term] <- adjusted_pval

  # Construct texreg object
  texreg::createTexreg(
    coef.names = tidy_df$term,
    coef = tidy_df$estimate,
    se = tidy_df$std.error,
    pvalues = tidy_df$p.value
  )
}

# Apply across all models
texreg_list <- mapply(
  make_texreg_partial_correction,
  model = models.log,
  adjusted_pval = pvals_adj,
  MoreArgs = list(target_term = target_term),
  SIMPLIFY = FALSE
)



# Output with corrected tables
texreg(texreg_list, custom.model.names = paste0("Code ", 1:11),caption = "Logistic Regression Results with Holmes Multiple Testing Correction on Persona Effect (P3 = Linguist) Only",caption.above = TRUE)
```

```{r}
texreg(list(fit1.log,fit2.log,fit3.log,fit4.log,fit5.log,fit6.log,fit7.log,fit8.log,fit9.log,fit10.log,fit11.log),custom.model.names = paste("Code",1:11,sep = " "),caption = "Logistic Multilevel Regression Results for Codes 1 to 11",label = "tab:LogRegCode1to11",caption.above = TRUE)
```


